import requests
from bs4 import BeautifulSoup
import pandas as pd
def bookschina(page):
    url = f'https://www.bookschina.com/book_find2/default.aspx?stp=Python&scate=0&f=1&sort=0&asc=0&sh=0&so=1&p={page}&pb=1'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'}
    response = requests.get(url=url, headers=headers)
    response.encoding = response.apparent_encoding
    code = response.text
    soup = BeautifulSoup(code, 'lxml')    
    names = soup.select('h2.name > a')
    dates = soup.select('span.pulishTiem')
    presses = soup.select('a.publisher')
    prices = soup.select('div.bookList div.priceWrap > del')
    sales = soup.select('div.bookList div.priceWrap > span.sellPrice')
    data = []
    for name, date, press, price, sale in zip(names, dates, presses, prices, sales):
        row = {}
        row['书名'] = name.get('title')
        row['出版时间'] = date.get_text()
        row['出版社'] = press.get_text()
        row['定价'] = price.get_text()
        row['售价'] = sale.get_text()
        data.append(row)
    return pd.DataFrame(data)
all_data = []
for i in range(1, 4):
    all_data.append(bookschina(i))
all_data = pd.concat(all_data, ignore_index=True)
all_data.to_excel('图书数据(多页).xlsx', index=False)
